library(ggplot2)
library(readxl)
library(readr)
library(tidyverse)
library(dplyr)
library(sf)
library(plotly)
library(geojsonio)
load dataset
df_descriptive=read_csv("filtered_merged_dataset_sample.csv")
data_final <- read_csv("data_final.csv")
Load spatial data (replace with actual shapefile path)
cdta_shape = st_read("nycdta2020_24d/nycdta2020.shp")
## Reading layer `nycdta2020' from data source
## `D:\Rproject\ZebangZHANGstat.github.io\nycdta2020_24d\nycdta2020.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 71 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
boro_shape = st_read("Borough Boundaries/geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e.shp")
## Reading layer `geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e' from data source `D:\Rproject\ZebangZHANGstat.github.io\Borough Boundaries\geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS 84
# There are 3 NA in the dataset, since there are no incident in these CDTAs
# Identify Missing Matches
unmatched_cdta <- setdiff(cdta_shape$CDTA2020, data_final$CDTA)
# Assign 0 to Missing Areas
cdta_incident_counts <- data_final %>%
group_by(CDTA) %>%
summarise(Number_of_Incidents = n(), .groups = "drop") %>%
complete(CDTA = unique(cdta_shape$CDTA2020), fill = list(Number_of_Incidents = 0))
#Re-Merge the Data
cdta_map_data <- cdta_shape %>%
left_join(cdta_incident_counts, by = c("CDTA2020" = "CDTA"))
# Update NA Handling
cdta_map_data <- cdta_map_data %>%
mutate(
Number_of_Incidents = ifelse(is.na(Number_of_Incidents), 0, Number_of_Incidents),
Incident_Range = cut(
Number_of_Incidents,
breaks = seq(0, 600, by = 120),
labels = c("0-120", "121-240", "241-360", "361-480", "481-600"),
include.lowest = TRUE
)
)
# Count the boro incident
boro_incident_counts <- data_final %>%
group_by(BORO) %>%
summarise(Number_of_Incidents = n(), .groups = "drop") %>%
mutate(BORO = tolower(BORO) )
# Lowercase the boro in boro_shape
boro_shape = boro_shape %>%
mutate(boro_name = tolower(boro_name))
# Merge spatial data with incident counts
boro_map_data <- boro_shape %>%
left_join(boro_incident_counts, by = c("boro_name" = "BORO"))
# Create custom breaks for Number_of_Incidents
boro_map_data <- boro_map_data %>%
mutate(
Incident_Range = cut(
Number_of_Incidents,
breaks = seq(0, 4000, by = 800), # Breaks from 0 to 4000, every 800 cases
labels = c("0-800", "801-1600", "1601-2400", "2401-3200", "3201-4000"),
include.lowest = TRUE
)
)
boro_map_data <- boro_map_data %>%
mutate(hover_text = paste("Borough:", boro_name,
"<br>Number of Incidents:", Number_of_Incidents))
# Interactive plot using plotly
plot <- plot_ly() %>%
add_sf(
data = boro_map_data,
split = ~boro_name, # Separate polygons by boroughs
color = ~Number_of_Incidents, # Color based on the number of incidents
colors = "viridis", # Color palette
text = ~hover_text, # Hover text
hoverinfo = "text"
) %>%
layout(
title = "Total Number of Incidents Across NYC BOROs (2017-2023)",
geo = list(
resolution = 50,
showland = TRUE,
landcolor = "rgb(217, 217, 217)",
showlakes = TRUE,
lakecolor = "rgb(173, 216, 230)",
projection = list(type = "mercator")
)
)
# Display the plot
plot
<<<<<<< HEAD
=======